package org.wisdomdata.selenium;

import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.logging.Logger;

import org.openqa.selenium.SearchContext;
import org.openqa.selenium.WebDriver;
import org.springframework.beans.factory.annotation.Autowired;
import org.wisdomdata.common.AbstractProcessor;
import org.wisdomdata.framework.TaskCommon;


/**
 * 正常处理器，目的是执行一系列动作，最后得到一个目标
 * 
 * */
public abstract class SeleniumProcessor extends AbstractProcessor{
	private final static Logger logger = 
			Logger.getLogger(SeleniumProcessor.class.getName());
	
	private boolean shouldGoBack = true;
	public boolean isShouldGoBack() {
		return shouldGoBack;
	}
	public void setShouldGoBack(boolean shouldGoBack) {
		this.shouldGoBack = shouldGoBack;
	}
	@Autowired
	private TaskCommon taskCommon;
	
	public TaskCommon getTaskCommon() {
		return taskCommon;
	}
	public void setTaskCommon(TaskCommon taskCommon) {
		this.taskCommon = taskCommon;
	}
	/**
	 * 每一个处理器紧接着许多抽取任务，这些抽取任务是有先后关系的，必须严格按照这样的顺序执行
	 * @author Clebeg
	 * @version v1
	 * */
	private List<SeleniumStringExtractor> extractors;
	public List<SeleniumStringExtractor> getExtractors() {
		return extractors;
	}
	public void setExtractors(List<SeleniumStringExtractor> extractors) {
		this.extractors = extractors;
	}

	//需要运行抓取的驱动
	@Autowired
	private SearchContext searchContext;
	
	public SearchContext getSearchContext() {
		return searchContext;
	}
	
	public void setSearchContext(SearchContext searchContext) {
		this.searchContext = searchContext;
	}

	public boolean prepareProcess() {
		
		if (this.getSearchContext() == null) {
			logger.warning("the selenium must init web driver, please check it!");
			return false;
		} 
		//记录当前处理的页面
		if (isShouldGoBack())
			this.getTaskCommon().stack.push(((WebDriver) this.getSearchContext()).getCurrentUrl());
		
		return true;
	}
	public void process() {
		if (prepareProcess()) {
			innerProcess();
			quitProcess();
		}
	}

	public void quitProcess() {
		if (isNeedStoreDone())
			this.taskCommon.writeTables();
		//离开时候恢复当前处理的页面
		if (isShouldGoBack())
			((WebDriver)this.getSearchContext()).get(this.getTaskCommon().stack.pop());
	}

	protected void commonExtract() {
		//做这个页面的抽取工作
		if (this.getExtractors() != null) {
			//如果需要抽取就准备接收抽取的结果，逻辑上是每个页面一个表
			//所以这里先得到这张表
			Map<String, List<String>> table = this.getTaskCommon().tables.get(this.getTableName());
			//如果这张表还没有建立，建立这张表
			if (table == null) {
				table = new LinkedHashMap<String, List<String>>();
				this.getTaskCommon().tables.put(this.getTableName(), table);
			}
			System.out.println("表 [" + this.getTableName() + "] 开始抽取新信息：");
			for (SeleniumStringExtractor slse : this.getExtractors()) {
				//第一次没有抽到怎么办，不放弃
				slse.extract();
				
				//获取表中某一列
				List<String> extractResult = table.get(slse.getExtractName());
				if (extractResult == null) {
					//如果这一列为空，则新建一列
					extractResult = new ArrayList<String>();
					table.put(slse.getExtractName(), extractResult);
				} 
				//判断抽取的结果如何，如果没有抽取的任何信息，给出提示，如果抽取到了任何信息则加入列中
				if (slse.getExtractResult() != null) {
					extractResult.add(slse.getExtractResult());
					System.out.print(
							"{[" + slse.getExtractName() + "]=[" 
							+ slse.getExtractResult() + "]} ");
				} else {
					extractResult.add("NULL");
					System.out.print("{[" + slse.getExtractName() + "]=[NULL]} ");
				}
			}
			if (this.isNeedStoreTargetUri()) {
				List<String> targetUri = table.get("CrawlUrl");
				if (targetUri == null) {
					//如果这一列为空，则新建一列
					targetUri = new ArrayList<String>();
					table.put("CrawlUrl", targetUri);
				} 
				targetUri.add(((WebDriver) this.getSearchContext()).getCurrentUrl());
			}
		}		
	}
	
	protected int getTableNumber(String tableName) {
		return this.getTaskCommon().getTableNumber(tableName);
	}
}

